Guided project of analysing data on Star Wars movies
In [1]:
import pandas as pd
star_wars = pd.read_csv("star_wars.csv", encoding="ISO-8859-1")
In [3]:
star_wars = star_wars[pd.notnull(star_wars["RespondentID"])]
star_wars.head(10)
Out[3]:
In [4]:
star_wars.columns
Out[4]:
In [5]:
yes_no = {"Yes": True, "No": False}
for col in [
"Have you seen any of the 6 films in the Star Wars franchise?",
"Do you consider yourself to be a fan of the Star Wars film franchise?"
]:
star_wars[col] = star_wars[col].map(yes_no)
star_wars.head(3)
Out[5]:
In [6]:
import numpy as np
movie_mapping = {
"Star Wars: Episode I The Phantom Menace": True,
np.nan: False,
"Star Wars: Episode II Attack of the Clones": True,
"Star Wars: Episode III Revenge of the Sith": True,
"Star Wars: Episode IV A New Hope": True,
"Star Wars: Episode V The Empire Strikes Back": True,
"Star Wars: Episode VI Return of the Jedi": True
}
for col in star_wars.columns[3:9]:
star_wars[col] = star_wars[col].map(movie_mapping)
In [7]:
star_wars = star_wars.rename(columns={
"Which of the following Star Wars films have you seen? Please select all that apply.": "seen_1",
"Unnamed: 4": "seen_2",
"Unnamed: 5": "seen_3",
"Unnamed: 6": "seen_4",
"Unnamed: 7": "seen_5",
"Unnamed: 8": "seen_6"
})
star_wars.head(3)
Out[7]:
In [8]:
star_wars = star_wars.rename(columns={
"Please rank the Star Wars films in order of preference with 1 being your favorite film in the franchise and 6 being your least favorite film.": "ranking_1",
"Unnamed: 10": "ranking_2",
"Unnamed: 11": "ranking_3",
"Unnamed: 12": "ranking_4",
"Unnamed: 13": "ranking_5",
"Unnamed: 14": "ranking_6"
})
star_wars.head(3)
Out[8]:
In [9]:
star_wars[star_wars.columns[9:15]] = star_wars[star_wars.columns[9:15]].astype(float)
In [10]:
star_wars[star_wars.columns[9:15]].mean()
Out[10]:
In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.bar(range(6), star_wars[star_wars.columns[9:15]].mean())
Out[11]:
In [12]:
star_wars[star_wars.columns[3:9]].sum()
Out[12]:
In [13]:
plt.bar(range(6), star_wars[star_wars.columns[3:9]].sum())
Out[13]:
In [ ]: